version 8.2
capture clear
capture log close
set more off
set mem 5000m
set mat 800


****Merge with Nunn's data:
*see "Preparation Contract intensive.do"

use "Enterprise surveys_clean.dta"
sort d1a2
merge d1a2 using "Contract intensity.dta"
*codebook _merge
drop _merge 
capture drop CI
ren frac_lib_diff CI
save "Enterprise surveys_clean.dta", replace



****now merge with Valued added Sweden:
use "Enterprise surveys_clean.dta"
sort d1a2
merge d1a2 using "JD_Sweden_4digit.dta"
*codebook _merge
*browse d1a2 _merge
*not everything merges....
drop _merge
save "Enterprise surveys_clean.dta", replace


****now merge with Valued added Continent:
use "Enterprise surveys_clean.dta"
sort continent d1a2
merge continent d1a2 using "JD_Continent_4digit.dta"
*codebook _merge
*browse d1a2 _merge
*not everything merges....
drop _merge
save "Enterprise surveys_clean.dta", replace


****now merge with Germany_Innovation:
use "Enterprise surveys_clean.dta"
capture drop DJ4g*
sort d1a2
merge d1a2 using "JD_Germany_Innovation_4digit.dta"
*codebook _merge
*browse d1a2 _merge
*not everything merges....
drop _merge
save "Enterprise surveys_clean.dta", replace




***merge with judicial reforms:
use "Enterprise surveys_clean.dta"
sort countryname year
merge countryname year using "Enterprise surveys_judicial_reforms_for merge with standardized dataset.dta"
*codebook _merge
drop _merge
save "Enterprise surveys_clean.dta", replace

****Interaction DJ and judicial reforms:

use "Enterprise surveys_clean.dta"


****interactions:



local list_triple_diff CI DJ4g1 DJ4g1m DJ4s1 DJ4c1 DJ4s1m DJ4c1m DJ4s2 DJ4c2 DJ4s2m DJ4c2m
foreach var3 of local list_triple_diff{

local list_qualifier_reform dum1 dum2 dum3 dum4 dum_small1 dum_small2 dum_small3 dum_small4 db_dum

foreach var2 of local list_qualifier_reform{

gen reform_`var2'_post_`var3'=reform_`var2'_post*`var3'
}

}



****Other reforms:

forvalues i=1(1) 1{
	local list_triple_diff DJ4g`i' DJ4g`i'm
	foreach var3 of local list_triple_diff{
		local list_qualifier_reform edu_gdp edu_percap health_gdp health_percap jud2_gdp jud2_percap tax_gdp tax_percap confl_gdp confl_percap trans_gdp nrj_gdp bank_gdp indus_gdp trade_gdp debt_gdp corrup_gdp parlia_gdp elec_gdp media_gdp trans_percap nrj_percap bank_percap indus_percap trade_percap debt_percap corrup_percap parlia_percap elec_percap media_percap
		foreach var2 of local list_qualifier_reform{
			gen reform_`var2'_post_`var3'=reform_`var2'_post*`var3'
			}
		}
	}


****Now work on va_perwo using PPP exchange rate:
gen va_perwo_ppp=va_perwo/pppconversionfactorgdplcuperinte

***Change of currencies:

/*
keep if dataset==1
****ranking by country:
collapse (mean) mean_va_perwo_ppp=va_perwo_ppp [pweight=wt], by(countryname year)
sort countryname year
browse countryname year mean_va_perwo_ppp
*/

***Ghana divided currency by 10,000 in 2007:
*https://fr.wikipedia.org/wiki/Cedi

*SO: replace va_perwo_ppp=va_perwo_ppp/10000 if countryname=="Ghana"&year==2007
*This is equivalent to:

replace pppconversionfactorgdplcuperinte=pppconversionfactorgdplcuperinte*10000 if countryname=="Ghana"&year==2007 


***Angola:
***something weird going on in 2010:
*browse countryname year va_perwo pppconversionfactorgdplcuperinte reform_dum2 if countryname=="Angola"

***But no change in currency:https://en.wikipedia.org/wiki/Angolan_kwanza
***leave it like this, and maybe drop angola from the regressions

***Something weird going on in Poland 2013:
*https://en.wikipedia.org/wiki/Polish_z%C5%82oty
*Romania 2013 
*https://en.wikipedia.org/wiki/Romanian_leu#Fourth_leu_.28RON.29:_2005-Present
*Slovak republic 2009
*Slovenia 2013
*Zambia: probably in the old currency:
*https://en.wikipedia.org/wiki/Zambian_kwacha
*So: replace va_perwo_ppp=va_perwo_ppp/1000 if countryname=="Zambia"
*This is equivalent to:

replace pppconversionfactorgdplcuperinte=pppconversionfactorgdplcuperinte*1000 if countryname=="Zambia"

***conclusion: take out Angola and Romania. Poland, Slovak republic, and Slovenia are already taken out because they are OECD countries

***recalculate va_perwo_ppp:
drop va_perwo_ppp 
gen va_perwo_ppp=va_perwo/pppconversionfactorgdplcuperinte
gen va_ppp=va/pppconversionfactorgdplcuperinte

*hist va_perwo_ppp
*hist va_perwo_ppp if countryname!="Angola"&countryname!="Romania"&countryname!="Sweden"&countryname!="Estonia"&countryname=="Czech Republic" ///
&countryname!="Slovak Republic"&countryname!="Slovenia"&countryname!="Hungary"&countryname!="Poland"

sort va_perwo_ppp
*browse countryname year va_perwo_ppp

***There are still some extreme outliers

***drop angola, romania (weird exchange rates) and OECD:
keep if countryname!="Angola"&countryname!="Romania"&countryname!="Sweden"&countryname!="Estonia"&countryname!="Czech Republic" ///
&countryname!="Slovak Republic"&countryname!="Slovenia"&countryname!="Hungary"&countryname!="Poland"

*keep if countryname!="Bosnia and Herzegovina"&countryname!="Croatia"&countryname!="Kosovo"&countryname!="Macedonia, FYR"&countryname!="Montenegro"&countryname!="Ukraine"

*keep if countryname!="Albania"&countryname!="Armenia"&countryname!="Azerbaijan"&countryname!="Belarus"&countryname!="Bulgaria"&countryname!="Georgia"&countryname!="Latvia"&countryname!="Lithuania"&countryname!="Moldova"

*USAID: keep if countryname!="Albania"&countryname!="Armenia"&countryname!="Bosnia and Herzegovina"&countryname!="Georgia"&&countryname!="Indonesia"&countryname!="Kosovo"&&countryname!="Moldova"&countryname!="Montenegro"


***trim 1%:
centile(va_perwo_ppp), centile(0.5 99.5)
gen va_perwo_ppp_trim1=va_perwo_ppp
replace va_perwo_ppp_trim1=. if va_perwo_ppp<`r(c_1)'& va_perwo_ppp!=.
replace va_perwo_ppp_trim1=. if va_perwo_ppp>`r(c_2)' & va_perwo_ppp!=.
*hist va_perwo_ppp_trim1
sum va_perwo_ppp_trim1


***Other variables to convert from LCU with PPP 
*n5_total n6 n7 

local list_variables n7_perwo
foreach var of local list_variables{
	gen `var'_ppp=`var'/pppconversionfactorgdplcuperinte
	}
	
/*
****ranking by country:
collapse (mean) mean_va_perwo_ppp=va_perwo_ppp ///
(mean) mean_n5_total_perwo_ppp=n5_total_perwo_ppp (mean) mean_n6_perwo_ppp=n6_perwo_ppp (mean) mean_n7_perwo_ppp=n7_perwo_ppp /// 
(mean) mean_reform_dum2=reform_dum2 (mean) mean_h7a=h7a ///
[pweight=wt], by(countryname year)
sort countryname year
browse countryname year mean_va_perwo_ppp mean_n5_total_perwo_ppp mean_n6_perwo_ppp mean_n7_perwo_ppp mean_reform_dum2 mean_h7a


***There seems to be a problem for Turkey in 2008...
*/

*hist n5_total_perwo_ppp
*hist n5_total_perwo_ppp if countryname!="Turkey"
*hist n6_perwo_ppp
*hist n7_perwo_ppp

local list_variables n7_perwo 
foreach var of local list_variables{


***trim 1%:
centile(`var'_ppp), centile(0.5 99.5)
gen `var'_ppp_trim1=`var'_ppp
replace `var'_ppp_trim1=. if `var'_ppp<`r(c_1)'& `var'_ppp!=.
replace `var'_ppp_trim1=. if `var'_ppp>`r(c_2)' & `var'_ppp!=.



}


***Create a variable new_firm=1
*Initial idea: new_fim=1 if firm created after first reform:
*However, what is the year of the reform for countries in whch no reforms!!!
*So new_firm=1 if firm created after baseline:
gen new_firm=1 if b5>=year_baseline&b5!=.
replace new_firm=0 if b5<year_baseline

*browse b5 year_baseline

tab new_firm

gen age_firm=year-b5
*browse year b5 age_firm
sort age_firm
replace age_firm=. if age_firm<0|age_firm>=1954
sum age_firm
sum age_firm if dataset==1
centile age_firm, centile(50)

	
*browse b6b
gen new_firm2=1 if b6b>=year_baseline&b6b!=.
replace new_firm2=0 if b6b<year_baseline

	
	
browse va_perwo_ppp_trim1	


save "Enterprise surveys_judicial_reforms_standardized dataset.dta", replace







